// Tencent is pleased to support the open source community by making RapidJSON available.
// 
// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
//
// Licensed under the MIT License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// http://opensource.org/licenses/MIT
//
// Unless required by applicable law or agreed to in writing, software distributed 
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
// specific language governing permissions and limitations under the License.

#ifndef RAPIDJSON_ENCODEDSTREAM_H_
#define RAPIDJSON_ENCODEDSTREAM_H_

#include "rapidjson.h"

#ifdef __GNUC__
RAPIDJSON_DIAG_PUSH
RAPIDJSON_DIAG_OFF(effc++)
#endif

RAPIDJSON_NAMESPACE_BEGIN

//! Input byte stream wrapper with a statically bound encoding.
/*!
    \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE.
    \tparam InputByteStream Type of input byte stream. For example, FileReadStream.
*/
    template<typename Encoding, typename InputByteStream>
    class EncodedInputStream {
        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
    public:
        typedef typename Encoding::Ch Ch;

        EncodedInputStream(InputByteStream &is) : is_(is) {
            current_ = Encoding::TakeBOM(is_);
        }

        Ch Peek() const { return current_; }

        Ch Take() {
            Ch c = current_;
            current_ = Encoding::Take(is_);
            return c;
        }

        size_t Tell() const { return is_.Tell(); }

        // Not implemented
        void Put(Ch) { RAPIDJSON_ASSERT(false); }

        void Flush() { RAPIDJSON_ASSERT(false); }

        Ch *PutBegin() {
            RAPIDJSON_ASSERT(false);
            return 0;
        }

        size_t PutEnd(Ch *) {
            RAPIDJSON_ASSERT(false);
            return 0;
        }

    private:
        EncodedInputStream(const EncodedInputStream &);

        EncodedInputStream &operator=(const EncodedInputStream &);

        InputByteStream &is_;
        Ch current_;
    };

//! Output byte stream wrapper with statically bound encoding.
/*!
    \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE.
    \tparam InputByteStream Type of input byte stream. For example, FileWriteStream.
*/
    template<typename Encoding, typename OutputByteStream>
    class EncodedOutputStream {
        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
    public:
        typedef typename Encoding::Ch Ch;

        EncodedOutputStream(OutputByteStream &os, bool putBOM = true) : os_(os) {
            if (putBOM)
                Encoding::PutBOM(os_);
        }

        void Put(Ch c) { Encoding::Put(os_, c); }

        void Flush() { os_.Flush(); }

        // Not implemented
        Ch Peek() const { RAPIDJSON_ASSERT(false); }

        Ch Take() { RAPIDJSON_ASSERT(false); }

        size_t Tell() const {
            RAPIDJSON_ASSERT(false);
            return 0;
        }

        Ch *PutBegin() {
            RAPIDJSON_ASSERT(false);
            return 0;
        }

        size_t PutEnd(Ch *) {
            RAPIDJSON_ASSERT(false);
            return 0;
        }

    private:
        EncodedOutputStream(const EncodedOutputStream &);

        EncodedOutputStream &operator=(const EncodedOutputStream &);

        OutputByteStream &os_;
    };

#define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x

//! Input stream wrapper with dynamically bound encoding and automatic encoding detection.
/*!
    \tparam CharType Type of character for reading.
    \tparam InputByteStream type of input byte stream to be wrapped.
*/
    template<typename CharType, typename InputByteStream>
    class AutoUTFInputStream {
        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
    public:
        typedef CharType Ch;

        //! Constructor.
        /*!
            \param is input stream to be wrapped.
            \param type UTF encoding type if it is not detected from the stream.
        */
        AutoUTFInputStream(InputByteStream &is, UTFType type = kUTF8) : is_(&is), type_(type), hasBOM_(false) {
            RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);
            DetectType();
            static const TakeFunc f[] = {RAPIDJSON_ENCODINGS_FUNC(Take)};
            takeFunc_ = f[type_];
            current_ = takeFunc_(*is_);
        }

        UTFType GetType() const { return type_; }

        bool HasBOM() const { return hasBOM_; }

        Ch Peek() const { return current_; }

        Ch Take() {
            Ch c = current_;
            current_ = takeFunc_(*is_);
            return c;
        }

        size_t Tell() const { return is_->Tell(); }

        // Not implemented
        void Put(Ch) { RAPIDJSON_ASSERT(false); }

        void Flush() { RAPIDJSON_ASSERT(false); }

        Ch *PutBegin() {
            RAPIDJSON_ASSERT(false);
            return 0;
        }

        size_t PutEnd(Ch *) {
            RAPIDJSON_ASSERT(false);
            return 0;
        }

    private:
        AutoUTFInputStream(const AutoUTFInputStream &);

        AutoUTFInputStream &operator=(const AutoUTFInputStream &);

        // Detect encoding type with BOM or RFC 4627
        void DetectType() {
            // BOM (Byte Order Mark):
            // 00 00 FE FF  UTF-32BE
            // FF FE 00 00  UTF-32LE
            // FE FF        UTF-16BE
            // FF FE        UTF-16LE
            // EF BB BF     UTF-8

            const unsigned char *c = (const unsigned char *) is_->Peek4();
            if (!c)
                return;

            unsigned bom = c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24);
            hasBOM_ = false;
            if (bom == 0xFFFE0000) {
                type_ = kUTF32BE;
                hasBOM_ = true;
                is_->Take();
                is_->Take();
                is_->Take();
                is_->Take();
            }
            else if (bom == 0x0000FEFF) {
                type_ = kUTF32LE;
                hasBOM_ = true;
                is_->Take();
                is_->Take();
                is_->Take();
                is_->Take();
            }
            else if ((bom & 0xFFFF) == 0xFFFE) {
                type_ = kUTF16BE;
                hasBOM_ = true;
                is_->Take();
                is_->Take();
            }
            else if ((bom & 0xFFFF) == 0xFEFF) {
                type_ = kUTF16LE;
                hasBOM_ = true;
                is_->Take();
                is_->Take();
            }
            else if ((bom & 0xFFFFFF) == 0xBFBBEF) {
                type_ = kUTF8;
                hasBOM_ = true;
                is_->Take();
                is_->Take();
                is_->Take();
            }

            // RFC 4627: Section 3
            // "Since the first two characters of a JSON text will always be ASCII
            // characters [RFC0020], it is possible to determine whether an octet
            // stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking
            // at the pattern of nulls in the first four octets."
            // 00 00 00 xx  UTF-32BE
            // 00 xx 00 xx  UTF-16BE
            // xx 00 00 00  UTF-32LE
            // xx 00 xx 00  UTF-16LE
            // xx xx xx xx  UTF-8

            if (!hasBOM_) {
                unsigned pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0);
                switch (pattern) {
                    case 0x08:
                        type_ = kUTF32BE;
                        break;
                    case 0x0A:
                        type_ = kUTF16BE;
                        break;
                    case 0x01:
                        type_ = kUTF32LE;
                        break;
                    case 0x05:
                        type_ = kUTF16LE;
                        break;
                    case 0x0F:
                        type_ = kUTF8;
                        break;
                    default:
                        break; // Use type defined by user.
                }
            }

            // Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
            if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
            if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
        }

        typedef Ch (*TakeFunc)(InputByteStream &is);

        InputByteStream *is_;
        UTFType type_;
        Ch current_;
        TakeFunc takeFunc_;
        bool hasBOM_;
    };

//! Output stream wrapper with dynamically bound encoding and automatic encoding detection.
/*!
    \tparam CharType Type of character for writing.
    \tparam InputByteStream type of output byte stream to be wrapped.
*/
    template<typename CharType, typename OutputByteStream>
    class AutoUTFOutputStream {
        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
    public:
        typedef CharType Ch;

        //! Constructor.
        /*!
            \param os output stream to be wrapped.
            \param type UTF encoding type.
            \param putBOM Whether to write BOM at the beginning of the stream.
        */
        AutoUTFOutputStream(OutputByteStream &os, UTFType type, bool putBOM) : os_(&os), type_(type) {
            RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);

            // Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
            if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
            if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4);

            static const PutFunc f[] = {RAPIDJSON_ENCODINGS_FUNC(Put)};
            putFunc_ = f[type_];

            if (putBOM)
                PutBOM();
        }

        UTFType GetType() const { return type_; }

        void Put(Ch c) { putFunc_(*os_, c); }

        void Flush() { os_->Flush(); }

        // Not implemented
        Ch Peek() const { RAPIDJSON_ASSERT(false); }

        Ch Take() { RAPIDJSON_ASSERT(false); }

        size_t Tell() const {
            RAPIDJSON_ASSERT(false);
            return 0;
        }

        Ch *PutBegin() {
            RAPIDJSON_ASSERT(false);
            return 0;
        }

        size_t PutEnd(Ch *) {
            RAPIDJSON_ASSERT(false);
            return 0;
        }

    private:
        AutoUTFOutputStream(const AutoUTFOutputStream &);

        AutoUTFOutputStream &operator=(const AutoUTFOutputStream &);

        void PutBOM() {
            typedef void (*PutBOMFunc)(OutputByteStream &);
            static const PutBOMFunc f[] = {RAPIDJSON_ENCODINGS_FUNC(PutBOM)};
            f[type_](*os_);
        }

        typedef void (*PutFunc)(OutputByteStream &, Ch);

        OutputByteStream *os_;
        UTFType type_;
        PutFunc putFunc_;
    };

#undef RAPIDJSON_ENCODINGS_FUNC

RAPIDJSON_NAMESPACE_END

#ifdef __GNUC__
RAPIDJSON_DIAG_POP
#endif

#endif // RAPIDJSON_FILESTREAM_H_
